// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy 3x12 SOFT
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_3x12_SOFT] -in
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(4.017e-03, 2.163e-02, -1.002e-02, -8.245e-02) * s0_0_0;
	r1 += V4(1.533e-02, -1.412e-02, -2.095e-02, -1.456e-02) * s0_0_0;
	r2 += V4(1.090e+00, 3.718e-01, 2.530e-02, -5.853e-02) * s0_0_0;
	r0 += V4(-3.021e-02, -7.030e-01, 1.833e-02, 8.436e-01) * s0_0_1;
	r1 += V4(-3.648e-03, -2.184e-02, -4.637e-01, -2.332e-02) * s0_0_1;
	r2 += V4(-1.048e+00, -3.455e-01, -5.261e-02, 4.802e-02) * s0_0_1;
	r0 += V4(2.722e-02, -1.899e-01, -5.562e-03, 2.338e-01) * s0_0_2;
	r1 += V4(-8.591e-02, 1.116e-02, -1.985e-01, -3.813e-03) * s0_0_2;
	r2 += V4(-4.699e-02, -2.802e-02, -1.862e-03, 1.403e-02) * s0_0_2;
	r0 += V4(-1.972e-02, 7.442e-02, 1.767e-02, -9.474e-02) * s0_1_0;
	r1 += V4(-2.740e-02, -9.590e-01, 7.886e-02, -6.213e-02) * s0_1_0;
	r2 += V4(8.192e-02, 3.845e-01, -2.209e-02, 4.713e-02) * s0_1_0;
	r0 += V4(-1.035e+00, 7.204e-01, 9.750e-01, -7.910e-01) * s0_1_1;
	r1 += V4(9.423e-01, -2.806e-02, 6.709e-01, 2.061e-01) * s0_1_1;
	r2 += V4(-8.376e-02, -3.271e-01, 5.770e-01, -8.823e-01) * s0_1_1;
	r0 += V4(1.048e+00, 7.322e-02, 2.703e-02, -1.008e-01) * s0_1_2;
	r1 += V4(-6.815e-01, 5.902e-05, 4.419e-02, -3.081e-02) * s0_1_2;
	r2 += V4(2.090e-03, -8.177e-02, -5.619e-01, 2.484e-01) * s0_1_2;
	r0 += V4(1.493e-02, -9.497e-02, -8.468e-03, 1.728e-01) * s0_2_0;
	r1 += V4(1.188e-02, 9.664e-01, -8.566e-02, 7.456e-02) * s0_2_0;
	r2 += V4(6.395e-03, -9.458e-02, -9.220e-04, 5.109e-02) * s0_2_0;
	r0 += V4(-2.741e-02, 4.229e-02, -9.902e-01, -9.435e-02) * s0_2_1;
	r1 += V4(-1.014e-01, 6.398e-02, -4.228e-02, -1.213e-01) * s0_2_1;
	r2 += V4(-1.364e-02, -4.071e-02, 1.941e-02, 4.013e-01) * s0_2_1;
	r0 += V4(1.677e-02, 5.994e-02, -2.648e-02, -8.944e-02) * s0_2_2;
	r1 += V4(-6.971e-02, -1.948e-02, -9.674e-03, -1.702e-02) * s0_2_2;
	r2 += V4(7.142e-03, 1.108e-01, -2.878e-02, -8.178e-03) * s0_2_2;
	r0 += V4(2.815e-03, 5.054e-03, 6.748e-05, -5.780e-03);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(5.381e-03, 1.745e-03, -3.588e-03, 2.850e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(6.194e-07, -5.462e-03, -3.649e-03, 3.135e-04);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_3x12_SOFT] -conv1
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-2.098e-01, 1.106e-01, 2.119e-01, 1.484e-02, -1.480e-01, -6.131e-01, -1.209e-01, -2.944e-04, -4.172e-02, -3.993e-01, -3.176e-01, -1.164e-01, -4.167e-01, 1.690e-01, 3.069e-02, 2.045e-02) * s0_0_0;
	r1 += M4(1.552e-01, 3.723e-02, 5.796e-02, 4.457e-02, -2.542e-02, -1.374e-02, -4.647e-03, -5.185e-02, 2.801e-01, 1.553e-02, 8.617e-02, -2.357e-01, 5.160e-02, -1.656e-03, 1.041e-02, -4.410e-03) * s0_0_0;
	r2 += M4(1.354e-01, -8.784e-01, 2.529e-01, -7.119e-02, 1.195e-02, 2.001e-01, 1.703e-01, 2.076e-01, 3.557e-01, -1.000e+00, -1.000e+00, 2.419e-01, -3.164e-02, -2.268e-01, 3.650e-02, -1.053e-02) * s0_0_0;
	r0 += M4(-6.534e-01, -3.761e-01, 1.454e-02, 2.482e-03, 6.379e-02, -3.873e-01, -1.328e-01, 3.242e-03, -1.476e-01, 5.195e-01, -3.015e-01, 5.064e-02, -3.275e-01, -2.376e-02, -3.675e-05, -1.248e-02) * s0_0_1;
	r1 += M4(3.028e-01, -2.583e-01, 1.907e-02, -1.527e-01, -9.748e-02, 7.851e-02, -1.232e-02, -9.111e-02, -1.000e+00, -1.085e-01, -1.091e-01, -3.219e-01, 5.474e-02, -3.052e-02, 4.287e-02, 3.302e-02) * s0_0_1;
	r2 += M4(5.185e-01, 1.486e-01, 1.845e-01, -5.808e-02, -2.542e-01, 2.731e-01, -1.488e-01, 1.867e-03, 4.227e-01, 3.992e-01, -1.000e+00, -2.480e-01, 7.492e-02, -2.271e-02, 4.325e-02, 1.716e-02) * s0_0_1;
	r0 += M4(2.957e-02, -2.739e-01, 3.440e-02, -1.875e-03, 3.073e-02, 6.320e-02, 1.366e-01, -1.551e-02, 2.125e-01, -2.847e-01, 3.276e-01, -3.097e-02, -2.297e-02, -7.785e-02, 4.663e-03, 1.034e-02) * s0_0_2;
	r1 += M4(-1.322e-02, -5.193e-02, 3.601e-02, -1.473e-02, 2.153e-01, -9.068e-02, 7.739e-02, -3.955e-02, 3.419e-01, -9.054e-02, 6.048e-02, -1.310e-01, 1.552e-02, 7.111e-02, -1.734e-02, -1.966e-02) * s0_0_2;
	r2 += M4(4.876e-02, -1.723e-01, 7.386e-02, 3.634e-03, -1.089e-01, -7.840e-01, 2.289e-01, -2.786e-02, -1.922e-01, -4.042e-01, 2.943e-01, -1.482e-01, 6.342e-02, 1.105e-01, 1.400e-02, 1.400e-02) * s0_0_2;
	r0 += M4(-1.714e-01, -5.966e-01, -1.000e+00, -3.849e-02, -6.566e-01, 2.187e-01, -2.725e-01, -1.312e-01, 1.792e-01, 1.173e-01, -9.618e-02, -2.132e-01, -3.232e-01, 1.088e-01, 4.290e-01, 9.173e-02) * s0_1_0;
	r1 += M4(1.010e-02, 3.563e-03, 1.883e-02, 2.639e-01, -1.361e-01, 4.606e-02, -6.963e-03, 2.266e-02, 1.336e-02, 5.567e-02, 5.181e-02, -1.193e-01, 1.567e-01, -2.817e-02, -5.916e-02, 9.290e-02) * s0_1_0;
	r2 += M4(2.674e-01, -5.837e-02, -8.563e-01, 4.134e-01, -3.330e-01, -3.119e-01, 7.079e-03, 1.841e-01, 4.045e-01, -6.590e-02, -9.369e-02, -2.117e-01, -1.541e-01, 4.550e-01, 5.971e-01, -1.075e-01) * s0_1_0;
	r0 += M4(5.527e-01, 8.846e-01, 1.753e-03, 8.509e-02, -4.500e-01, 1.444e-01, 3.355e-01, 7.962e-02, -4.660e-02, 4.622e-01, -1.000e+00, -1.000e+00, -3.102e-01, -4.051e-01, 1.512e-01, 2.644e-02) * s0_1_1;
	r1 += M4(-1.000e+00, -4.274e-01, -1.177e-01, -5.478e-01, -2.039e-01, 2.612e-01, -1.092e-01, -6.752e-02, -1.000e+00, 1.683e-02, 6.908e-02, -1.148e-02, 9.317e-01, -1.601e-01, 1.183e-01, 1.092e-01) * s0_1_1;
	r2 += M4(2.014e-01, -1.338e-01, -1.000e+00, -8.228e-02, 1.197e-01, 2.178e-01, -5.318e-01, -4.049e-01, 3.408e-01, 4.453e-02, -1.704e-01, 3.470e-01, -3.149e-01, -1.000e+00, -1.304e-01, 1.861e-01) * s0_1_1;
	r0 += M4(6.000e-02, 2.162e-01, 3.064e-02, -2.508e-02, -5.294e-01, -6.590e-01, -1.914e-01, 3.295e-02, 1.680e-02, -4.489e-01, 4.325e-01, 1.369e-01, 5.058e-01, 5.737e-01, -1.894e-01, 2.851e-02) * s0_1_2;
	r1 += M4(2.951e-01, 9.945e-02, 9.714e-03, 5.658e-02, 5.176e-01, -4.606e-01, 1.931e-01, -5.990e-03, -9.268e-01, -1.845e-01, -2.050e-01, 1.679e-02, -5.526e-01, 3.268e-01, -7.895e-02, -4.821e-02) * s0_1_2;
	r2 += M4(-2.842e-01, 4.593e-02, 4.036e-03, 3.393e-02, 4.434e-01, -4.648e-01, 4.405e-01, 1.493e-01, -5.541e-01, 5.985e-02, 1.721e-01, -1.573e-02, -5.581e-01, 4.130e-01, -3.322e-01, 1.048e-01) * s0_1_2;
	r0 += M4(3.664e-02, -4.889e-01, -1.862e-02, 2.437e-02, 1.707e-01, 4.101e-01, 2.959e-01, -9.133e-02, 2.144e-02, 3.068e-02, -4.828e-02, -8.716e-02, -2.195e-01, -3.119e-01, 1.269e-01, 3.135e-02) * s0_2_0;
	r1 += M4(9.474e-02, 1.268e-02, -9.254e-02, 1.526e-01, -2.115e-01, -5.624e-03, 7.920e-02, 1.808e-01, -3.602e-02, -3.080e-02, 8.481e-02, 6.270e-03, 8.794e-02, -8.751e-02, 1.361e-01, 1.367e-01) * s0_2_0;
	r2 += M4(-1.634e-01, -4.282e-02, 2.450e-01, 3.431e-01, -1.335e-01, 2.062e-01, 2.170e-01, -1.800e-04, 1.178e-02, -8.150e-02, 1.331e-03, -2.861e-02, -3.448e-01, 1.487e-01, 6.867e-02, -7.054e-02) * s0_2_0;
	r0 += M4(1.381e-01, -5.785e-01, 2.484e-01, -1.116e-01, -6.993e-01, -4.425e-01, -4.489e-01, 9.507e-01, 1.178e-02, 1.164e-01, 9.484e-03, -3.221e-02, 4.640e-01, -2.825e-01, 3.259e-01, -5.555e-01) * s0_2_1;
	r1 += M4(-2.005e-01, -1.590e-02, 3.656e-02, 2.140e-01, -1.785e-01, 2.881e-01, -1.000e+00, -2.175e-02, -5.680e-03, 4.529e-02, 8.699e-03, -2.663e-02, 1.000e+00, -1.048e-01, 7.153e-01, 2.028e-01) * s0_2_1;
	r2 += M4(-5.587e-01, 9.332e-02, 3.429e-01, -4.967e-02, 1.564e-01, 6.568e-01, 6.258e-01, -1.862e-01, 2.819e-02, -9.394e-02, 9.968e-02, 4.774e-02, -1.209e-01, -6.171e-01, -2.422e-01, 9.143e-02) * s0_2_1;
	r0 += M4(-1.591e-03, -1.656e-01, 1.799e-02, 3.573e-02, 3.217e-01, 6.239e-01, 2.561e-01, -6.659e-02, -5.252e-03, -1.213e-01, 5.833e-02, 6.116e-02, -7.179e-02, -9.093e-02, -2.234e-01, -7.584e-03) * s0_2_2;
	r1 += M4(-2.825e-01, 2.442e-04, -4.750e-02, -4.105e-02, -5.617e-01, -7.371e-02, 7.222e-02, -2.163e-01, -8.786e-02, 8.544e-03, -7.400e-02, 5.233e-02, 1.580e-01, 1.606e-01, 1.614e-01, 1.440e-01) * s0_2_2;
	r2 += M4(-3.862e-01, 2.020e-01, 1.233e-01, -6.756e-03, -3.152e-01, -3.042e-01, -2.493e-01, 1.179e-01, -8.951e-02, 1.570e-01, 4.109e-02, -1.885e-02, 2.359e-01, 3.588e-01, 3.396e-01, 1.142e-02) * s0_2_2;
	r0 += M4(-8.400e-02, 2.572e-01, 1.645e-01, 7.185e-02, -9.359e-02, -3.663e-02, -2.774e-02, -1.322e-02, 2.825e-01, 6.463e-01, 1.897e-01, -1.945e-02, -3.602e-02, 3.101e-01, 3.379e-01, 4.191e-02) * s1_0_0;
	r1 += M4(-2.007e-01, 1.259e-03, 7.017e-02, -1.475e-01, -1.402e-02, 1.145e-02, -5.412e-04, -4.418e-02, 2.655e-02, -1.239e-02, 1.610e-02, 4.419e-02, 5.094e-01, 1.309e-02, 1.105e-01, 4.343e-01) * s1_0_0;
	r2 += M4(2.959e-01, 4.535e-01, -1.958e-01, -1.150e-01, 8.017e-02, 2.930e-02, -4.198e-02, 1.018e-01, 8.747e-02, -2.609e-01, -1.265e-01, -2.399e-01, -4.639e-01, -2.052e-01, 1.000e+00, -1.004e-01) * s1_0_0;
	r0 += M4(5.905e-01, -4.235e-02, 1.692e-01, -5.360e-02, 2.703e-01, 1.694e-01, 3.627e-02, 7.665e-02, -2.743e-01, 3.683e-01, 1.383e-01, 1.266e-02, 6.510e-01, 2.070e-01, -1.775e-01, 1.073e-01) * s1_0_1;
	r1 += M4(-3.015e-01, -6.966e-02, -1.018e-01, 4.270e-02, 2.008e-01, -2.213e-02, 3.686e-02, 7.709e-02, -4.166e-02, 1.090e-02, -1.432e-02, 9.455e-02, -7.004e-01, -3.143e-01, -2.292e-01, -3.605e-01) * s1_0_1;
	r2 += M4(-2.824e-01, -2.265e-01, -1.033e-01, 4.033e-01, -1.541e-01, 2.005e-01, -1.086e-02, -2.066e-01, 1.617e-01, -2.381e-01, 2.538e-01, -7.209e-04, -4.636e-04, -1.000e+00, -6.909e-01, -1.873e-01) * s1_0_1;
	r0 += M4(3.889e-02, -2.496e-01, -2.711e-02, -2.675e-03, -1.000e+00, 1.842e-01, -1.000e+00, -4.585e-02, -7.667e-02, 1.731e-01, -1.488e-01, 4.136e-03, 1.133e-01, 4.309e-01, -4.039e-01, 1.169e-01) * s1_0_2;
	r1 += M4(-6.091e-02, 7.603e-02, -4.551e-02, -4.749e-02, 1.550e-01, 2.059e-02, 3.369e-02, -7.957e-02, -5.548e-02, 2.939e-02, -6.664e-02, 6.670e-02, 3.117e-01, 4.173e-01, 2.399e-02, 7.073e-01) * s1_0_2;
	r2 += M4(-3.151e-02, -8.326e-02, 1.351e-01, 2.966e-02, 2.010e-01, 8.536e-03, -2.551e-02, -2.793e-01, 2.491e-01, 9.160e-01, -3.855e-01, -4.897e-03, -1.648e-02, 7.971e-01, -8.532e-01, 7.142e-02) * s1_0_2;
	r0 += M4(3.912e-01, 1.242e-01, 1.886e-01, 1.496e-01, 3.409e-02, -3.322e-02, -5.286e-02, -1.362e-02, -3.368e-01, -8.143e-02, 1.324e-01, 1.124e-01, -2.968e-01, 1.166e-02, 1.831e-01, 1.279e-02) * s1_1_0;
	r1 += M4(1.653e-01, 1.309e-02, 1.685e-01, -2.245e-02, 1.976e-02, 8.515e-03, -2.704e-02, -2.238e-02, -1.291e-01, -2.572e-02, -7.126e-02, -1.304e-01, -5.554e-01, -6.955e-02, -6.072e-02, -2.686e-01) * s1_1_0;
	r2 += M4(-1.761e-01, 6.158e-02, 1.302e-01, -5.932e-01, 1.069e-01, -5.456e-02, -2.498e-03, 1.553e-01, 6.939e-02, -4.282e-03, 2.144e-01, -3.321e-01, 3.648e-02, -2.173e-01, -3.916e-01, -4.481e-01) * s1_1_0;
	r0 += M4(-2.502e-01, -5.978e-01, -5.887e-01, -1.814e-01, -1.040e-01, -4.387e-01, -1.291e-01, 1.627e-01, -5.379e-01, 3.539e-01, -1.465e-01, -2.298e-02, -1.000e+00, -3.000e-01, 3.991e-01, -6.036e-01) * s1_1_1;
	r1 += M4(4.047e-01, 6.488e-01, 6.640e-02, 6.309e-01, -1.717e-01, -2.669e-02, -2.475e-01, -1.036e-01, 2.872e-01, 1.052e-01, 7.317e-02, 9.705e-02, 2.717e-01, 3.232e-01, 9.824e-01, 5.618e-01) * s1_1_1;
	r2 += M4(-1.000e+00, 1.528e-01, 4.842e-01, -3.799e-01, -2.425e-02, -5.674e-01, -3.530e-01, 7.895e-02, -3.938e-01, -5.834e-02, 1.976e-01, 5.176e-01, -6.376e-01, 4.396e-02, 3.682e-01, 7.689e-01) * s1_1_1;
	r0 += M4(2.330e-02, 1.183e-01, 1.776e-01, -1.992e-02, 3.124e-01, 4.817e-04, -2.086e-02, -5.396e-02, -7.754e-02, -9.291e-02, 2.015e-01, -5.914e-03, -4.000e-02, -1.000e+00, -4.149e-01, -7.061e-02) * s1_1_2;
	r1 += M4(-1.855e-01, 8.157e-02, 1.809e-02, -9.623e-02, -1.000e+00, -1.215e-01, -1.000e+00, 1.934e-01, -4.037e-01, 7.482e-02, -1.556e-01, 7.712e-03, 7.090e-01, -5.089e-01, -2.903e-01, -1.342e-01) * s1_1_2;
	r2 += M4(-2.779e-04, -3.559e-01, -1.017e-02, 1.042e-03, 1.584e-01, -4.874e-01, -1.000e+00, -1.565e-01, 1.322e-01, 1.089e-01, -3.490e-01, -1.237e-02, 6.081e-01, -1.636e-01, 1.564e-01, -1.869e-01) * s1_1_2;
	r0 += M4(1.604e-01, -6.101e-01, -3.496e-01, 4.032e-02, 4.945e-02, -3.237e-03, -1.511e-02, -3.007e-02, -4.495e-01, -1.994e-01, 1.261e-03, 6.627e-02, 4.696e-01, 1.291e-01, 1.513e-01, -1.593e-02) * s1_2_0;
	r1 += M4(1.434e-01, 1.339e-02, 6.438e-04, 7.262e-02, 8.119e-03, 3.401e-03, -6.500e-03, 1.482e-02, 8.303e-01, 9.435e-02, 8.714e-02, 1.270e-01, -6.029e-02, 2.988e-02, -1.115e-01, -2.798e-01) * s1_2_0;
	r2 += M4(8.699e-02, -2.020e-01, -1.740e-01, -2.468e-01, 3.577e-02, -1.957e-02, 7.283e-03, 1.750e-02, -7.011e-02, 2.625e-01, -4.867e-01, 1.926e-01, 1.727e-01, 1.742e-01, 9.442e-02, 2.935e-01) * s1_2_0;
	r0 += M4(-2.823e-01, 2.198e-01, -1.197e-01, 6.760e-02, 4.213e-02, -9.985e-02, 4.175e-02, 2.092e-02, 4.206e-01, 8.053e-02, 6.850e-01, 1.247e-01, 9.447e-02, -1.590e-01, 7.935e-02, 4.516e-01) * s1_2_1;
	r1 += M4(-1.892e-01, -5.053e-02, -7.117e-02, -1.985e-01, -2.300e-01, 1.874e-02, -8.708e-02, 4.849e-02, 2.439e-01, -1.163e-01, 1.116e-01, 3.428e-02, -2.822e-01, -9.028e-02, -2.470e-01, -1.814e-01) * s1_2_1;
	r2 += M4(4.908e-02, -8.549e-02, -1.212e-01, 1.384e-01, -1.150e-01, 1.292e-01, 1.406e-03, -3.821e-02, -2.515e-01, 1.404e-01, -2.627e-01, -1.026e-01, -7.995e-03, 3.301e-01, 1.889e-02, -3.510e-01) * s1_2_1;
	r0 += M4(-4.157e-03, 9.393e-02, 4.102e-02, 1.292e-02, 1.127e-01, -2.353e-01, 2.399e-02, 4.703e-03, -1.616e-01, 1.450e-01, -3.454e-01, 3.486e-02, -3.057e-01, 3.347e-01, 3.347e-02, -1.229e-01) * s1_2_2;
	r1 += M4(-3.221e-02, -1.066e-01, -1.779e-02, 1.004e-03, 4.458e-01, 2.160e-03, 7.711e-02, 6.519e-02, 5.059e-01, 5.669e-03, -9.529e-02, 1.491e-01, -1.339e-01, 1.013e-01, -1.073e-02, -2.425e-01) * s1_2_2;
	r2 += M4(-1.504e-02, -1.128e-01, -2.194e-02, 9.703e-02, -2.917e-01, 1.881e-02, 1.188e-01, 6.815e-02, 4.443e-01, 2.005e-01, -2.557e-01, -5.482e-02, 2.803e-02, -7.970e-02, 1.756e-01, -1.289e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-5.666e-02, -4.242e-02, -3.760e-03, -1.724e-04, 1.861e-01, 1.299e-01, -4.120e-02, 3.261e-03, -4.438e-02, 2.958e-01, 5.478e-02, 3.455e-02, 3.855e-01, -2.061e-01, -3.626e-01, -2.863e-02) * s0_0_0;
	r1 += M4(4.253e-02, 1.593e-02, -4.433e-03, 1.663e-02, 2.362e-02, -4.390e-02, 2.508e-02, 4.657e-03, -9.591e-02, 1.420e-01, -9.565e-02, -4.762e-01, -2.206e-01, 2.095e-02, -2.806e-02, 1.249e-01) * s0_0_0;
	r2 += M4(5.921e-02, -3.380e-02, 8.555e-02, 1.868e-02, -2.755e-02, 1.405e-01, -1.824e-01, 2.036e-04, -6.267e-01, 4.999e-01, 8.886e-01, 1.510e-01, 8.263e-02, 1.317e-01, 1.324e-01, 1.253e-01) * s0_0_0;
	r0 += M4(-1.528e-01, -2.506e-01, 1.174e-01, 7.110e-03, 4.063e-01, 7.093e-01, -3.761e-01, 1.825e-02, 3.083e-01, 1.409e-01, 5.059e-01, 6.276e-02, 4.179e-01, -9.430e-02, -3.996e-01, -7.863e-02) * s0_0_1;
	r1 += M4(1.473e-02, 1.935e-03, -1.751e-02, -3.706e-02, -1.663e-01, -6.763e-02, 7.139e-02, 1.193e-01, 1.379e-01, -4.468e-01, 1.589e-01, -1.982e-01, 6.152e-02, -4.326e-02, 5.682e-02, -8.386e-02) * s0_0_1;
	r2 += M4(4.000e-03, 1.828e-01, 8.007e-02, -3.085e-02, 4.342e-02, -1.744e-01, 1.691e-01, -9.784e-02, -3.788e-01, 2.468e-01, -2.624e-01, -3.165e-01, 9.743e-02, -7.667e-02, 4.289e-01, -1.268e-01) * s0_0_1;
	r0 += M4(-1.301e-02, -2.244e-01, 3.398e-02, 6.676e-03, 1.156e-01, 4.872e-01, -6.079e-03, 7.192e-02, 1.659e-01, 5.332e-01, 8.473e-02, -5.051e-03, -2.513e-01, 5.785e-01, -1.430e-01, -1.961e-03) * s0_0_2;
	r1 += M4(5.120e-02, 2.301e-02, 1.837e-02, -5.961e-03, -2.043e-01, 4.051e-02, -7.627e-03, 6.587e-02, -9.115e-02, -1.378e-02, -7.783e-03, 3.907e-02, 6.477e-02, 1.722e-02, -9.389e-03, 7.852e-02) * s0_0_2;
	r2 += M4(4.411e-02, 3.078e-02, 6.450e-02, -2.014e-02, -1.400e-01, 3.173e-01, -2.808e-03, 5.585e-02, -2.661e-01, 2.536e-01, -5.751e-02, -7.377e-02, 1.055e-01, 8.455e-02, -6.779e-02, -2.997e-02) * s0_0_2;
	r0 += M4(-1.352e-01, 9.250e-02, -9.399e-02, 5.436e-04, 1.935e-01, -1.499e-01, 4.931e-01, 1.886e-02, -2.183e-01, -7.815e-02, 9.593e-01, -7.409e-02, 1.442e-01, 3.904e-01, -1.502e-01, -6.166e-02) * s0_1_0;
	r1 += M4(6.226e-03, -9.141e-04, 9.388e-03, 3.908e-04, -1.617e-01, 4.619e-02, -8.112e-02, 2.351e-02, 5.628e-01, 6.729e-02, -5.672e-02, -1.218e-03, 3.680e-04, -4.964e-02, 1.604e-02, 6.716e-02) * s0_1_0;
	r2 += M4(2.634e-02, -2.568e-01, -4.668e-02, 2.887e-02, 1.360e-04, 5.294e-01, 3.504e-01, -1.227e-01, 4.142e-01, -2.801e-02, -1.000e+00, -5.708e-01, -1.075e-01, 4.410e-01, 1.000e+00, -2.634e-01) * s0_1_0;
	r0 += M4(-2.337e-01, -7.839e-01, -7.730e-02, -5.641e-02, -1.952e-01, 4.589e-01, 3.193e-03, 7.793e-02, -3.444e-01, -2.036e-01, 3.935e-01, 5.948e-02, -7.492e-02, -5.435e-01, 7.428e-01, 8.256e-02) * s0_1_1;
	r1 += M4(5.954e-02, -5.179e-02, -1.260e-01, 2.590e-01, -5.575e-02, -8.316e-02, 1.890e-01, 2.750e-01, 8.003e-02, -3.662e-01, 2.826e-01, -4.158e-01, -1.000e+00, -6.421e-02, 2.670e-01, -4.709e-01) * s0_1_1;
	r2 += M4(-6.885e-02, -1.227e-02, -3.862e-01, 4.948e-02, 5.840e-02, -7.344e-01, -3.654e-01, -7.076e-01, 1.000e+00, 2.128e-01, 6.738e-01, 1.764e-01, -5.841e-01, -7.028e-01, -1.000e+00, 2.259e-01) * s0_1_1;
	r0 += M4(1.168e-01, -7.293e-01, -2.115e-01, -3.967e-03, -7.397e-02, -3.166e-01, 2.214e-02, -1.505e-01, -1.766e-01, -5.101e-01, -1.791e-01, 5.195e-02, -1.415e-01, 5.920e-02, -8.982e-02, -1.202e-02) * s0_1_2;
	r1 += M4(3.256e-01, 3.232e-01, 6.011e-02, -1.895e-01, -3.786e-01, 5.724e-01, -1.915e-01, 2.186e-01, 4.162e-01, -1.629e-01, 7.904e-02, 1.218e-01, -1.000e+00, 3.462e-02, -9.283e-02, -2.934e-02) * s0_1_2;
	r2 += M4(2.168e-02, 6.909e-02, 4.553e-02, -1.329e-01, -3.943e-02, -2.674e-01, -8.328e-02, 3.610e-02, 1.183e-01, 4.157e-01, 2.159e-01, 6.256e-03, 4.384e-01, -6.567e-02, -1.843e-01, -5.908e-02) * s0_1_2;
	r0 += M4(-1.072e-01, 8.430e-03, -9.861e-02, 2.038e-03, 1.731e-01, -3.392e-02, 8.459e-03, 1.221e-02, 1.680e-01, 1.000e+00, 3.195e-01, -1.241e-01, -2.264e-01, 3.042e-01, -3.319e-02, -3.043e-02) * s0_2_0;
	r1 += M4(1.839e-01, 3.031e-02, 3.351e-02, 6.825e-02, -1.344e-01, -3.919e-02, 2.746e-02, -5.827e-02, -3.807e-01, 3.406e-02, 4.573e-02, -1.590e-01, 1.963e-01, 6.695e-03, 5.670e-02, -2.403e-02) * s0_2_0;
	r2 += M4(7.925e-02, 3.060e-02, -8.618e-03, 1.139e-01, -9.687e-02, -3.092e-01, -3.770e-01, -8.989e-02, 3.642e-01, 2.167e-01, 1.507e-01, -2.012e-01, 1.705e-01, -1.041e-01, 1.464e-01, 2.662e-03) * s0_2_0;
	r0 += M4(2.094e-01, 1.225e-01, -6.810e-01, -1.101e-01, -2.304e-01, -5.918e-01, 5.710e-01, 1.078e-01, 1.628e-01, 5.449e-01, -2.348e-01, 8.185e-03, -3.077e-01, 3.549e-01, 1.420e-01, -9.173e-03) * s0_2_1;
	r1 += M4(1.111e-01, 6.613e-02, -9.460e-02, -1.643e-01, -1.051e-01, -1.646e-01, -9.992e-02, -8.536e-02, -3.864e-01, -6.314e-02, -2.519e-01, -1.472e-01, -5.798e-01, -6.424e-02, -1.300e-01, -1.665e-01) * s0_2_1;
	r2 += M4(5.187e-02, -2.512e-01, -1.000e+00, -1.000e+00, -6.265e-02, 1.916e-01, 1.225e-01, -3.195e-01, 4.737e-01, -1.917e-01, -7.417e-01, -1.720e-01, 1.498e-01, 3.405e-03, -2.251e-01, -1.028e-01) * s0_2_1;
	r0 += M4(-8.978e-02, 2.320e-01, -1.000e+00, 3.593e-02, 4.464e-02, -2.208e-01, 7.185e-01, -1.431e-02, 4.580e-02, 3.554e-02, 7.048e-03, -3.044e-02, 1.117e-01, 2.567e-01, 1.112e-01, -7.046e-02) * s0_2_2;
	r1 += M4(-9.392e-01, 3.491e-02, -9.144e-03, -1.410e-01, 5.859e-01, 2.708e-01, -5.922e-02, 3.690e-01, 3.609e-02, 1.126e-01, 7.024e-02, 1.669e-02, -1.201e-01, -1.900e-02, 7.566e-02, 1.020e-01) * s0_2_2;
	r2 += M4(-1.000e+00, 1.645e-01, 1.675e-02, 1.459e-01, -3.278e-01, -1.472e-01, 3.917e-01, 7.404e-02, -3.662e-02, -2.992e-02, -5.658e-02, -9.393e-02, 1.930e-01, -2.340e-01, -2.212e-01, 2.113e-03) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_3x12_SOFT] -conv2
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-2.308e-02, 8.627e-03, -1.876e-02, -3.111e-02, 3.887e-03, 5.485e-02, -4.837e-02, 1.003e-01, -1.310e-02, 1.211e-02, -4.575e-02, 1.290e-02, -5.230e-02, -8.915e-02, 8.037e-02, -2.537e-02) * s0_0_0;
	r1 += M4(4.372e-02, -1.273e-02, 1.674e-01, -7.509e-02, 6.249e-02, 5.548e-02, -1.048e-01, -6.519e-02, -7.734e-02, -6.717e-02, 9.190e-02, 2.973e-02, -1.534e-01, -2.564e-02, 7.954e-02, -1.562e-02) * s0_0_0;
	r2 += M4(3.165e-02, -3.942e-02, 7.107e-03, -3.061e-02, 3.801e-02, -1.865e-02, 5.315e-02, 5.246e-02, -4.759e-02, 3.190e-02, -4.745e-02, -3.791e-02, -7.057e-02, 3.674e-02, 7.412e-03, 2.688e-02) * s0_0_0;
	r0 += M4(8.916e-03, 1.695e-02, -3.104e-02, -3.820e-02, 8.181e-02, 3.915e-02, 1.840e-03, 2.140e-01, 4.362e-02, -3.250e-01, -1.342e-02, 1.928e-01, 2.033e-02, -2.585e-01, 2.160e-02, -4.593e-01) * s0_0_1;
	r1 += M4(-1.645e-01, 7.431e-02, -1.063e-01, -6.414e-03, 2.225e-01, 1.639e-02, 1.482e-01, 2.401e-01, -1.487e-02, 4.908e-02, -2.078e-01, -1.463e-01, -5.021e-01, -1.929e-01, 7.162e-02, -2.480e-01) * s0_0_1;
	r2 += M4(-4.743e-02, 4.990e-02, -2.505e-02, 5.780e-03, -2.280e-01, -6.269e-02, 1.177e-01, -3.377e-02, -2.912e-01, -2.710e-02, 1.125e-01, 1.316e-01, 5.293e-01, -1.424e-01, -2.861e-01, -2.666e-01) * s0_0_1;
	r0 += M4(2.799e-02, -7.550e-02, -9.620e-02, -1.067e-01, 1.574e-02, -9.780e-02, 4.888e-02, 1.037e-01, 1.911e-02, 2.213e-03, -3.296e-03, 5.637e-02, -1.867e-02, 6.716e-02, -5.105e-02, -1.632e-01) * s0_0_2;
	r1 += M4(-6.461e-03, 9.208e-02, 4.713e-03, -8.100e-02, 2.699e-02, -8.120e-02, 9.926e-02, 7.510e-02, -6.576e-02, 1.479e-02, -1.443e-01, -4.930e-02, 1.529e-01, 3.555e-02, -9.217e-02, 1.005e-01) * s0_0_2;
	r2 += M4(-5.885e-01, -1.393e-01, -6.462e-02, 1.547e-01, 3.407e-01, -1.347e-02, -3.793e-02, 2.764e-02, 1.127e-01, -2.973e-03, 1.819e-03, 2.023e-02, -6.185e-01, 4.636e-02, -3.760e-02, 2.754e-01) * s0_0_2;
	r0 += M4(5.088e-02, 1.362e-01, -3.060e-01, -9.701e-02, 3.428e-02, -3.939e-02, 3.038e-02, 1.339e-01, -8.318e-04, -4.656e-02, -3.527e-01, 4.172e-02, -3.361e-02, -3.259e-02, -1.121e-01, -3.815e-02) * s0_1_0;
	r1 += M4(7.432e-02, 7.490e-02, 2.872e-02, -1.341e-01, 8.267e-02, -1.210e-01, -4.666e-01, 9.526e-02, 2.780e-03, 7.422e-02, -7.677e-02, 5.249e-02, -8.082e-02, -1.528e-01, 8.717e-02, 2.326e-02) * s0_1_0;
	r2 += M4(1.017e-01, -3.923e-02, 4.076e-02, -1.753e-01, 1.553e-01, 2.457e-02, 3.181e-02, -5.667e-02, -3.143e-02, 2.896e-02, 1.344e-01, -8.834e-02, 4.444e-02, 1.227e-02, -4.529e-02, 5.598e-03) * s0_1_0;
	r0 += M4(-4.131e-02, 4.698e-01, -9.891e-02, 1.293e-01, 5.479e-02, -1.307e-01, 1.261e-01, 2.206e-01, -2.406e-01, -5.476e-02, -1.171e-01, 3.264e-01, 8.300e-01, -2.675e-02, 1.311e-01, 8.317e-02) * s0_1_1;
	r1 += M4(5.215e-01, 8.750e-02, 8.055e-02, -1.802e-01, -3.092e-01, -1.844e-01, -2.840e-01, 4.955e-02, -1.529e-01, 4.476e-02, -6.357e-01, 5.183e-02, -1.602e-01, 2.930e-01, -1.502e-01, 2.204e-02) * s0_1_1;
	r2 += M4(3.096e-01, -4.031e-03, 7.253e-02, 1.724e-01, 3.132e-02, -6.539e-02, -2.721e-01, 4.023e-03, -1.502e-01, -1.407e-01, 1.234e-01, 2.701e-01, -1.128e-01, -8.001e-02, 6.940e-01, -1.143e-02) * s0_1_1;
	r0 += M4(8.976e-02, 4.464e-02, -4.202e-02, -1.077e-01, -5.752e-02, 3.442e-02, 1.846e-02, 6.191e-02, -2.837e-02, 4.506e-02, 7.993e-02, 2.672e-01, -2.403e-02, 8.573e-02, -1.607e-02, -1.298e-01) * s0_1_2;
	r1 += M4(1.911e-01, 3.075e-01, 1.706e-01, -1.380e-01, 8.967e-02, -5.111e-01, 6.003e-02, -9.101e-03, -3.321e-02, 1.174e-01, -3.594e-02, -1.163e-02, 5.997e-02, -2.103e-01, 2.862e-02, 2.119e-02) * s0_1_2;
	r2 += M4(7.216e-01, 8.470e-02, 3.161e-01, 4.241e-02, -1.449e-01, -2.867e-03, -1.135e-01, -1.673e-02, 1.478e-01, -4.787e-01, 9.935e-02, -1.045e-01, 1.926e-02, -3.486e-01, -6.940e-02, -1.670e-01) * s0_1_2;
	r0 += M4(1.104e-01, -5.745e-02, -1.832e-01, -1.194e-01, -5.226e-02, 6.558e-03, -2.361e-02, 9.985e-02, -4.732e-02, 1.342e-02, -3.061e-02, 4.440e-02, 2.144e-02, 2.358e-03, 5.466e-03, -2.005e-02) * s0_2_0;
	r1 += M4(8.823e-02, 2.165e-01, 1.887e-01, -1.176e-01, 3.864e-02, -1.661e-01, -7.766e-02, -1.308e-02, 8.378e-03, 3.062e-03, 2.857e-02, 1.332e-02, -2.785e-02, -2.028e-02, 6.389e-02, -9.019e-03) * s0_2_0;
	r2 += M4(6.125e-02, 1.139e-01, 1.328e-01, -8.550e-02, 8.000e-03, -4.355e-02, 5.229e-02, 1.366e-03, 1.039e-02, 6.964e-02, 2.468e-02, 2.796e-02, 2.494e-02, 4.284e-03, 9.045e-02, -5.244e-02) * s0_2_0;
	r0 += M4(-9.705e-02, 4.094e-02, -1.293e-01, -3.369e-01, -3.828e-01, 1.941e-02, -3.971e-02, -6.478e-02, -2.339e-01, 6.777e-03, -1.049e-01, 1.240e-01, -2.652e-01, 2.045e-02, 2.010e-02, -1.865e-02) * s0_2_1;
	r1 += M4(1.017e-01, -3.235e-02, -1.366e-01, 4.273e-02, 1.208e-01, -4.072e-01, 2.257e-01, -1.116e-01, 5.435e-03, -1.386e-02, 1.999e-01, 5.705e-03, 2.613e-02, -1.236e-01, -1.532e-02, -2.909e-02) * s0_2_1;
	r2 += M4(4.362e-02, -8.306e-02, -3.496e-02, 2.196e-01, 1.473e-01, 1.488e-01, -1.660e-01, -1.413e-01, -1.277e-03, 3.080e-02, -1.583e-02, 8.057e-02, 5.811e-03, 3.173e-02, -1.034e-01, 5.039e-02) * s0_2_1;
	r0 += M4(1.166e-01, 3.233e-03, -7.317e-02, -7.807e-02, 9.155e-02, 5.979e-02, 2.216e-02, 6.361e-02, 1.670e-02, 2.289e-02, -3.452e-03, 6.689e-02, -2.706e-02, 6.633e-05, -4.114e-03, -5.878e-02) * s0_2_2;
	r1 += M4(3.358e-02, 1.184e-01, 1.565e-01, -9.053e-02, 1.218e-01, -2.929e-01, 3.570e-01, 3.999e-02, 2.220e-02, -2.002e-02, 1.917e-01, 3.418e-02, 6.055e-03, -1.549e-02, -4.772e-02, -5.983e-03) * s0_2_2;
	r2 += M4(-9.799e-02, -1.524e-02, 1.536e-01, 3.387e-01, 6.622e-02, -3.252e-01, 6.481e-02, -2.412e-01, -1.990e-02, -1.685e-01, -1.549e-02, -4.033e-02, -4.046e-03, -4.515e-02, -3.395e-02, -3.383e-02) * s0_2_2;
	r0 += M4(-4.574e-02, -1.474e-01, -9.247e-02, 4.897e-02, -6.088e-02, -1.657e-01, 6.793e-02, 6.954e-02, 5.292e-02, 5.029e-02, 1.002e-02, -3.075e-02, 6.182e-03, 8.574e-02, -2.578e-02, 8.531e-02) * s1_0_0;
	r1 += M4(3.853e-02, 6.099e-03, 8.827e-04, -4.779e-02, -5.788e-02, -9.818e-02, -4.873e-02, 6.659e-02, -4.328e-02, -3.215e-03, -4.921e-02, 1.815e-02, 3.308e-02, 7.698e-02, 9.366e-02, -4.604e-02) * s1_0_0;
	r2 += M4(-3.627e-02, -1.524e-02, 7.354e-02, 1.685e-02, -1.028e-01, 5.336e-02, 3.299e-02, 1.985e-02, 7.638e-02, -9.827e-02, -1.129e-01, 9.154e-03, 9.462e-03, 1.860e-02, 6.470e-02, 1.868e-04) * s1_0_0;
	r0 += M4(-1.375e-02, -4.960e-02, -4.076e-02, 1.028e-01, 5.504e-03, 4.309e-02, -5.383e-02, -2.761e-02, -8.714e-02, 3.000e-01, -9.311e-03, 1.102e-01, 2.209e-03, 2.447e-01, 3.387e-02, 2.250e-01) * s1_0_1;
	r1 += M4(2.811e-01, -7.498e-02, -1.249e-01, -1.004e-01, 3.986e-02, 1.024e-01, -8.861e-03, -3.903e-02, -5.379e-01, 1.908e-01, -4.964e-02, 4.487e-01, 1.362e-01, 1.117e-01, 1.181e-01, 1.130e-01) * s1_0_1;
	r2 += M4(3.045e-02, -2.602e-02, 5.039e-02, -1.334e-01, -6.809e-01, -1.390e-01, -1.607e-02, -7.810e-02, -5.805e-02, 1.527e-01, 7.123e-03, 3.661e-02, 3.387e-01, -4.980e-02, 6.210e-02, 6.710e-03) * s1_0_1;
	r0 += M4(1.813e-02, -4.827e-02, 2.072e-02, 1.606e-01, -3.955e-03, -5.599e-02, 2.032e-03, 1.827e-02, 1.560e-02, -1.021e-01, -2.402e-02, -8.116e-02, 1.925e-03, 1.769e-02, 3.005e-02, 1.101e-01) * s1_0_2;
	r1 += M4(-7.990e-02, -4.152e-02, -5.838e-02, 6.609e-03, -3.263e-02, 3.990e-02, -1.505e-02, -1.725e-02, 2.948e-02, 1.649e-02, 1.605e-01, 3.137e-02, -6.146e-03, 1.367e-02, 7.333e-02, 1.869e-02) * s1_0_2;
	r2 += M4(1.293e-01, 1.628e-02, -5.476e-03, 5.260e-02, -3.454e-03, 3.031e-02, -1.148e-02, 7.532e-02, -3.585e-01, -2.311e-01, -2.790e-02, -1.950e-01, 6.313e-02, 6.017e-02, 7.195e-02, -3.949e-02) * s1_0_2;
	r0 += M4(6.174e-03, -7.091e-02, -9.371e-02, 5.123e-02, -6.846e-02, 1.701e-01, 4.567e-01, 1.116e-01, 8.851e-02, 7.560e-03, -1.281e-02, -7.894e-02, 1.313e-02, -3.879e-02, 9.565e-02, 2.097e-01) * s1_1_0;
	r1 += M4(8.474e-02, -8.943e-02, -2.684e-02, 3.727e-02, -3.199e-01, 1.122e-01, -5.402e-03, -3.212e-02, -1.194e-01, 1.486e-01, -2.649e-02, -1.052e-01, 9.900e-02, 2.808e-02, 1.187e-01, 3.655e-02) * s1_1_0;
	r2 += M4(2.907e-02, -8.604e-02, 4.768e-02, 2.089e-02, 3.468e-04, -8.861e-02, -1.344e-01, 4.900e-02, 6.694e-02, -6.911e-02, 1.265e-01, -2.787e-02, -9.585e-02, -6.997e-02, 8.144e-02, 7.887e-02) * s1_1_0;
	r0 += M4(8.807e-02, -5.390e-02, -1.005e-01, 3.365e-01, 2.107e-03, 2.744e-01, -5.179e-02, -3.397e-01, -1.000e+00, 1.180e-01, 6.085e-03, -4.134e-01, 1.165e-02, -1.887e-01, 5.368e-02, 3.882e-01) * s1_1_1;
	r1 += M4(1.149e-01, 1.388e-01, 1.711e-01, -3.850e-02, 2.365e-01, -2.717e-01, 1.928e-01, -7.034e-02, 1.145e-01, -3.806e-01, 3.048e-01, -9.635e-03, 4.592e-02, 1.078e-01, -6.581e-01, 3.697e-02) * s1_1_1;
	r2 += M4(1.638e-01, 4.124e-04, -7.136e-02, 6.287e-03, -1.000e+00, -1.000e+00, -1.502e-01, -1.983e-01, 1.155e-01, -9.395e-02, -2.444e-01, -6.950e-02, -2.738e-01, -2.138e-01, 8.466e-02, 1.994e-02) * s1_1_1;
	r0 += M4(-9.252e-02, 3.528e-02, 8.927e-03, 2.056e-01, 2.609e-02, -2.240e-02, 9.537e-03, 6.257e-02, -3.767e-02, -1.289e-02, -1.530e-02, -1.141e-01, -5.998e-02, -7.041e-02, -1.061e-02, 1.033e-01) * s1_1_2;
	r1 += M4(-4.977e-03, -6.321e-02, -6.206e-02, 9.460e-03, 4.755e-02, 1.776e-02, -1.814e-02, 2.996e-02, -8.718e-02, -5.672e-02, -1.996e-02, 3.721e-02, 6.826e-02, 9.240e-02, -3.248e-01, -6.630e-02) * s1_1_2;
	r2 += M4(2.349e-01, 1.636e-01, -7.437e-02, -1.117e-01, -6.246e-02, 5.089e-02, 7.254e-02, 6.234e-02, -7.461e-02, -6.319e-02, -6.544e-02, 1.329e-02, 1.838e-01, 3.845e-01, 3.136e-02, 1.427e-02) * s1_1_2;
	r0 += M4(8.488e-02, 1.038e-02, 7.143e-02, 1.348e-01, -4.140e-01, -1.247e-03, 6.997e-02, -7.591e-02, -2.777e-02, -8.416e-03, 1.085e-03, -4.717e-02, 1.844e-02, 1.413e-02, 3.740e-02, 1.066e-01) * s1_2_0;
	r1 += M4(1.630e-02, -8.288e-04, -4.635e-03, 3.143e-03, -1.233e-04, 6.242e-02, -1.240e-01, -3.980e-02, 1.665e-02, -5.244e-02, -7.049e-02, 1.503e-03, -1.728e-03, 3.744e-02, -1.362e-01, -1.121e-02) * s1_2_0;
	r2 += M4(-2.559e-02, -2.599e-03, 1.035e-02, 9.188e-03, 7.004e-02, -2.991e-03, -7.866e-02, -4.241e-02, 1.818e-02, 1.824e-02, 1.120e-02, 2.921e-02, -2.791e-02, -4.498e-02, 2.268e-02, -2.946e-02) * s1_2_0;
	r0 += M4(1.115e-01, -6.985e-03, 2.148e-02, 2.106e-01, 1.073e-02, 5.676e-03, 7.733e-02, -1.026e-01, 2.825e-02, 9.037e-03, 5.345e-02, 6.180e-02, -4.701e-02, 2.764e-02, -4.061e-02, -4.546e-02) * s1_2_1;
	r1 += M4(-1.651e-02, -8.000e-02, -1.092e-02, -9.732e-03, -2.449e-02, -3.420e-02, -2.643e-01, 2.263e-02, 1.872e-02, 3.147e-03, -7.996e-01, 9.451e-03, 9.380e-02, -1.022e-01, -5.999e-01, -1.047e-02) * s1_2_1;
	r2 += M4(-5.563e-02, -1.387e-01, -1.084e-01, 8.775e-03, -4.131e-02, -1.486e-01, -5.619e-02, 1.064e-02, -1.325e-02, 7.729e-03, 4.652e-02, 5.710e-03, 3.785e-02, -4.086e-02, -1.031e-01, 2.426e-02) * s1_2_1;
	r0 += M4(5.740e-02, -4.341e-02, 4.822e-02, 1.716e-01, -1.011e-02, -2.733e-03, -4.930e-03, -1.748e-02, -5.932e-02, 2.292e-02, 2.917e-02, 1.924e-02, 7.637e-02, -4.146e-02, -2.410e-02, 5.345e-02) * s1_2_2;
	r1 += M4(1.306e-02, -2.928e-02, 1.777e-01, -2.367e-02, 3.614e-03, -2.357e-02, 5.674e-02, 6.725e-03, -1.136e-02, -4.513e-02, -7.599e-02, 2.624e-03, 5.842e-03, 1.251e-01, -7.784e-02, -9.287e-03) * s1_2_2;
	r2 += M4(1.068e-01, -4.240e-02, -1.685e-03, -7.593e-02, 5.191e-03, -6.350e-02, -3.912e-02, 1.457e-01, 2.692e-03, -4.927e-02, -5.822e-03, -3.778e-03, 3.979e-02, 2.216e-01, 8.972e-02, 2.372e-03) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(3.403e-02, 4.444e-02, -1.291e-01, -1.447e-03, -1.593e-02, -1.999e-01, 3.669e-02, 5.018e-02, 8.121e-03, 7.078e-02, 4.054e-02, 5.194e-02, 1.889e-02, 2.111e-03, 5.103e-02, -7.100e-02) * s0_0_0;
	r1 += M4(1.094e-02, 1.008e-01, 1.921e-03, -1.695e-01, -6.040e-03, -1.184e-01, 6.073e-02, -4.630e-02, -3.934e-02, 6.528e-02, 8.730e-02, -4.635e-02, -3.932e-02, 1.450e-02, -3.478e-02, 4.457e-02) * s0_0_0;
	r2 += M4(-3.330e-02, 8.214e-03, 8.636e-03, 3.255e-02, 1.184e-01, 5.355e-02, -9.582e-02, 1.771e-03, -8.827e-03, 9.789e-03, -1.025e-03, 1.134e-02, 5.888e-02, -1.061e-03, -5.843e-02, -2.435e-02) * s0_0_0;
	r0 += M4(6.365e-03, 3.170e-03, -5.134e-02, 5.126e-03, -8.309e-02, 1.537e-01, -3.873e-03, -3.406e-02, 4.405e-03, 2.942e-02, -3.787e-02, 1.567e-01, 2.622e-02, -8.447e-02, 9.802e-02, -2.210e-01) * s0_0_1;
	r1 += M4(-4.508e-02, 2.052e-01, 1.116e-01, -2.501e-02, 1.730e-01, -5.258e-02, 2.745e-02, 6.155e-02, 2.134e-01, 4.209e-02, 8.785e-02, -8.379e-02, -6.358e-02, -5.337e-02, -8.183e-02, 1.356e-01) * s0_0_1;
	r2 += M4(1.539e-01, 2.415e-02, 1.286e-02, -5.909e-02, -3.223e-02, 1.433e-02, -4.238e-03, -1.088e-02, 2.751e-01, -8.502e-02, 5.828e-02, -2.378e-02, 1.463e-02, 2.138e-02, -9.394e-02, -1.217e-01) * s0_0_1;
	r0 += M4(3.674e-02, 1.840e-01, -2.525e-02, -4.257e-02, -2.276e-02, -6.213e-02, 2.419e-02, 2.596e-02, 1.478e-02, 1.806e-01, 1.501e-02, 6.521e-02, -1.482e-02, -1.129e-01, -2.090e-03, -9.541e-02) * s0_0_2;
	r1 += M4(1.628e-01, 1.137e-01, 4.179e-02, -8.001e-02, 4.651e-02, -1.969e-01, 2.581e-02, -6.242e-02, 1.255e-01, -9.413e-03, 1.730e-01, 4.987e-02, -9.636e-02, -1.291e-01, -2.451e-01, -3.060e-02) * s0_0_2;
	r2 += M4(-8.528e-02, 8.652e-02, 1.947e-02, 2.857e-02, 1.021e-01, -6.987e-02, -5.520e-02, 1.057e-01, 7.226e-02, 9.091e-03, 1.617e-02, -5.743e-02, -7.863e-02, 5.367e-02, -4.756e-02, 1.301e-01) * s0_0_2;
	r0 += M4(6.518e-02, -4.685e-03, 6.736e-03, 2.286e-02, 1.880e-02, -5.884e-02, 1.607e-02, -2.749e-02, -2.026e-04, 9.086e-02, 1.428e-01, 1.859e-01, -1.523e-02, 3.603e-02, -4.705e-02, -7.968e-02) * s0_1_0;
	r1 += M4(4.067e-02, 1.944e-01, 2.359e-01, -2.283e-02, -9.651e-02, -2.973e-01, -2.126e-01, 6.557e-02, 1.090e-01, 8.889e-02, 9.572e-02, -9.844e-02, -1.473e-02, 1.672e-02, -5.233e-02, -1.381e-02) * s0_1_0;
	r2 += M4(9.153e-02, -2.302e-02, -2.630e-01, 6.432e-02, 6.314e-02, 1.029e-01, -3.724e-02, -2.309e-01, -3.386e-02, -4.921e-02, -8.205e-02, 6.763e-02, 5.746e-02, 4.161e-02, -6.639e-02, -2.681e-02) * s0_1_0;
	r0 += M4(3.820e-02, -1.368e-02, -1.192e-02, 7.838e-02, 1.034e-01, 3.912e-01, -1.493e-01, 1.292e-02, 1.253e-02, -3.526e-01, -1.219e-01, -3.430e-02, 6.175e-03, 7.628e-03, -5.245e-01, -5.453e-01) * s0_1_1;
	r1 += M4(-1.158e-01, 3.137e-01, 3.347e-02, -1.035e-01, 2.324e-01, -1.958e-04, -2.103e-01, -1.850e-02, 1.156e-01, -2.250e-01, -2.826e-01, 7.042e-02, -3.018e-01, -9.895e-02, 5.899e-02, -1.685e-01) * s0_1_1;
	r2 += M4(2.099e-01, 5.768e-02, -6.569e-02, -2.390e-01, -1.017e-01, -6.341e-02, -1.351e-01, 3.277e-01, 2.782e-01, -4.490e-01, -3.140e-02, -3.535e-01, 9.973e-02, 9.573e-02, -5.064e-01, -1.353e-01) * s0_1_1;
	r0 += M4(9.471e-02, 4.798e-02, -4.192e-02, -8.401e-02, 9.868e-03, -1.875e-01, 7.133e-02, 2.120e-01, 7.782e-02, -1.598e-01, -2.753e-02, -9.279e-02, -1.054e-02, 1.932e-02, 5.822e-02, -4.387e-02) * s0_1_2;
	r1 += M4(4.881e-02, 5.639e-02, 3.918e-02, -9.908e-02, -1.014e-01, -3.085e-01, -4.021e-02, 2.696e-02, 8.175e-02, 6.478e-02, -2.548e-01, 1.449e-03, 1.067e-01, -1.245e-02, -9.559e-03, 1.665e-01) * s0_1_2;
	r2 += M4(-4.711e-01, 3.471e-02, 1.550e-02, 1.219e-01, 3.789e-01, 2.734e-02, -5.312e-02, 2.625e-01, 7.046e-02, -2.196e-01, -4.067e-03, -8.964e-03, 1.140e-01, 2.280e-01, -7.434e-02, 3.837e-01) * s0_1_2;
	r0 += M4(2.506e-02, 5.232e-02, -1.505e-01, -1.849e-02, 4.206e-02, 1.187e-02, -1.652e-01, -4.197e-02, 8.620e-02, 4.559e-02, 4.799e-02, 1.347e-01, -1.835e-02, -1.377e-02, -2.413e-02, -2.333e-02) * s0_2_0;
	r1 += M4(4.211e-02, 8.227e-02, 1.897e-01, -4.653e-02, -8.225e-02, -3.428e-01, 7.297e-03, -7.447e-02, -4.259e-02, -7.351e-02, -2.367e-01, 9.672e-03, -4.796e-02, -7.966e-02, 3.892e-02, 1.923e-03) * s0_2_0;
	r2 += M4(6.624e-02, -4.935e-02, 1.876e-02, -4.484e-02, -4.365e-02, 3.852e-03, 4.637e-02, -2.007e-01, -1.539e-02, -5.289e-02, -1.005e-02, 3.042e-02, 2.001e-02, 2.366e-02, -9.969e-03, -2.204e-02) * s0_2_0;
	r0 += M4(2.750e-02, 2.155e-02, -1.727e-01, -5.583e-02, -2.831e-01, 7.063e-02, 2.481e-01, 1.560e-01, 7.497e-02, -5.244e-02, -5.137e-02, 7.350e-02, 1.404e-02, -8.471e-03, 1.057e-01, 1.543e-02) * s0_2_1;
	r1 += M4(7.712e-02, 1.794e-01, 2.547e-01, -4.106e-02, 1.596e-01, -2.082e-01, 1.460e-02, 3.117e-02, 1.387e-01, 8.012e-02, -1.000e+00, -8.899e-02, 6.465e-02, -1.841e-01, 1.887e-01, 6.896e-02) * s0_2_1;
	r2 += M4(9.430e-02, 8.992e-02, 2.329e-01, 8.793e-02, 8.179e-02, 8.245e-02, -1.763e-01, 1.422e-01, 8.981e-02, -2.093e-01, -4.364e-02, -5.130e-02, -3.809e-02, 6.954e-02, 2.836e-02, -5.707e-02) * s0_2_1;
	r0 += M4(-1.688e-02, 6.715e-03, -2.218e-02, -1.013e-01, -4.390e-02, -5.342e-02, 1.335e-02, 4.729e-02, 1.396e-01, -5.102e-03, -1.806e-02, 7.751e-03, -9.589e-02, -2.587e-02, 5.583e-02, -6.477e-02) * s0_2_2;
	r1 += M4(6.149e-02, 7.766e-02, -1.941e-02, -5.727e-03, 1.603e-01, -9.631e-02, -2.058e-01, 3.020e-02, -1.670e-02, 5.436e-02, -4.650e-01, -6.086e-02, -3.878e-02, -2.323e-01, -1.249e-02, -2.241e-02) * s0_2_2;
	r2 += M4(-1.743e-02, -8.055e-02, -2.653e-02, 1.550e-03, -2.426e-01, 2.204e-01, -7.337e-02, 7.432e-02, -2.111e-02, -7.345e-02, 8.483e-03, -1.007e-01, -1.001e-01, 1.860e-01, -1.705e-02, 6.418e-02) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_3x12_SOFT] -conv3
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv2
//!BIND LUMA
//!SAVE conv3
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(4.942e-03, 2.067e-02, -3.711e-02, 2.233e-02, -1.057e-02, 4.952e-03, -4.034e-03, -9.873e-03, 4.234e-04, -1.246e-02, 4.034e-03, 6.164e-03, -1.984e-03, -1.423e-02, -1.043e-02, -1.637e-03) * s0_0_0;
	r1 += M4(-1.447e-01, 9.965e-05, 6.708e-02, -2.899e-02, 6.951e-03, 6.303e-03, -2.123e-02, 8.745e-03, -1.384e-02, -1.330e-05, 1.039e-03, 3.147e-03, 9.423e-02, -1.924e-03, -2.687e-02, -2.687e-02) * s0_0_0;
	r2 += M4(-1.000e+00, 2.507e-02, -2.347e-01, 4.039e-03, -1.000e+00, -5.609e-02, -2.068e-01, 2.911e-02, -1.000e+00, -9.938e-03, 1.995e-01, 1.436e-02, -1.000e+00, -1.131e-01, -1.000e+00, -4.207e-02) * s0_0_0;
	r0 += M4(1.866e-02, -1.548e-01, -9.232e-02, -1.199e-02, -1.103e-03, -6.423e-03, 1.013e-01, 5.024e-03, -2.314e-03, -3.257e-02, 2.757e-02, 1.166e-02, 2.154e-02, -7.077e-02, -5.550e-02, -3.509e-03) * s0_0_1;
	r1 += M4(-9.689e-02, 1.446e-03, -2.883e-01, -3.937e-02, -6.645e-02, -2.018e-02, 4.544e-02, 2.499e-03, -3.227e-01, -1.541e-02, 1.210e-01, -6.531e-02, 3.289e-02, -1.937e-02, -5.139e-02, -1.155e-02) * s0_0_1;
	r2 += M4(-1.000e+00, -1.474e-01, -3.241e-02, -1.149e-01, -1.001e-01, 2.536e-02, 9.380e-02, 8.229e-03, -1.000e+00, 5.047e-03, 4.396e-01, -2.486e-02, -1.000e+00, -3.610e-02, -5.575e-02, 4.947e-02) * s0_0_1;
	r0 += M4(2.321e-02, 8.252e-02, -4.816e-02, 3.011e-02, -1.740e-02, -1.967e-02, -9.224e-03, 3.922e-03, -2.098e-02, -6.069e-02, -6.864e-02, -4.213e-02, -1.902e-02, -4.465e-03, 2.342e-03, 4.387e-03) * s0_0_2;
	r1 += M4(4.149e-02, 1.617e-02, 4.532e-02, 5.229e-02, 4.922e-02, 2.160e-02, 7.352e-03, 6.876e-03, 1.444e-01, -4.161e-02, 2.720e-02, -1.205e-02, -1.038e-01, 2.394e-02, 5.056e-03, -8.090e-03) * s0_0_2;
	r2 += M4(4.658e-02, -1.524e-02, 7.526e-02, -8.177e-02, -5.662e-02, 3.060e-02, 3.052e-02, -2.464e-02, 4.295e-02, -2.162e-02, -2.097e-01, -2.397e-01, -1.461e-01, -2.693e-02, -9.477e-02, -9.573e-02) * s0_0_2;
	r0 += M4(3.099e-02, 3.513e-02, 1.978e-02, 5.288e-02, 3.433e-02, -6.075e-03, 5.421e-03, 2.738e-02, -1.417e-02, 4.545e-02, -1.627e-02, -2.458e-04, 1.504e-02, -6.740e-02, 5.407e-03, -2.964e-02) * s0_1_0;
	r1 += M4(6.015e-02, -2.608e-02, 1.516e-01, -7.360e-02, 1.305e-01, -1.450e-03, -6.114e-02, -6.891e-03, -1.109e-01, -1.257e-03, -4.739e-02, 4.008e-02, 4.311e-02, -2.848e-03, -1.206e-01, -5.942e-02) * s0_1_0;
	r2 += M4(-1.552e-01, -5.530e-03, 3.542e-01, 2.889e-02, -5.480e-01, 2.251e-03, -6.101e-01, 6.203e-02, 1.602e-01, -4.572e-02, -3.263e-02, -4.825e-03, 3.993e-02, 6.495e-02, -2.808e-01, 4.459e-02) * s0_1_0;
	r0 += M4(3.489e-01, 6.126e-01, -2.909e-03, 3.486e-01, 1.568e-01, 1.812e-01, 3.604e-01, 1.577e-01, 2.419e-01, 1.124e-01, -1.574e-02, 1.098e-01, -2.582e-01, 1.282e-01, -1.229e-02, -7.352e-02) * s0_1_1;
	r1 += M4(5.295e-01, 1.132e-01, 5.647e-01, -3.940e-01, 1.509e-01, -2.066e-02, 3.615e-01, -1.831e-01, 4.972e-01, -6.823e-02, 4.833e-01, -6.645e-02, -9.981e-01, 3.238e-02, -5.354e-01, -1.335e-01) * s0_1_1;
	r2 += M4(-3.435e-02, 2.125e-01, 3.252e-02, 2.103e-01, 3.279e-01, 5.219e-01, 1.003e-01, 1.270e-02, 1.900e-01, 2.592e-01, -3.415e-01, -5.746e-02, -5.268e-01, -1.300e-01, -3.459e-01, -3.084e-01) * s0_1_1;
	r0 += M4(-1.738e-02, 6.878e-02, 3.157e-02, -6.790e-02, -5.192e-02, -3.640e-02, -3.623e-02, -1.725e-02, -4.719e-02, 1.367e-01, 7.152e-02, 3.377e-01, 5.616e-02, -8.505e-02, 1.767e-02, -1.378e-02) * s0_1_2;
	r1 += M4(1.534e-01, -2.402e-01, 6.564e-02, 1.594e-01, 1.370e-01, -9.514e-04, 4.745e-03, -2.407e-02, 7.867e-02, 5.367e-02, 1.871e-01, 9.446e-02, 6.431e-02, -8.471e-02, 1.304e-02, -6.719e-03) * s0_1_2;
	r2 += M4(2.680e-02, -2.241e-02, -4.825e-02, 3.470e-01, -2.657e-02, 1.272e-01, 1.490e-01, -1.362e-02, 4.349e-02, 1.043e-01, 3.986e-02, 4.673e-01, 2.456e-02, -1.634e-01, -1.364e-01, -8.419e-02) * s0_1_2;
	r0 += M4(-4.494e-03, 3.566e-03, 9.788e-03, 1.825e-02, -1.136e-01, -6.891e-02, -5.489e-03, -6.519e-02, -1.389e-02, -6.362e-03, 9.052e-04, -1.281e-02, -3.069e-02, -2.023e-02, -7.095e-03, -1.010e-02) * s0_2_0;
	r1 += M4(-9.180e-03, -3.967e-03, 7.984e-02, 7.929e-04, -2.998e-03, 4.592e-02, -2.059e-01, 4.677e-02, -3.461e-03, -1.047e-03, 4.863e-02, 1.283e-02, -6.654e-02, 7.159e-04, -3.626e-02, -3.541e-02) * s0_2_0;
	r2 += M4(-5.285e-03, 4.478e-02, 1.099e-01, 2.997e-03, 2.084e-01, -5.824e-02, -1.521e-01, -1.654e-02, -8.646e-03, 1.456e-02, 1.390e-01, -1.333e-02, -8.996e-02, -3.048e-02, -2.666e-01, -1.047e-02) * s0_2_0;
	r0 += M4(-5.680e-02, 4.725e-02, -3.449e-03, -2.721e-02, 4.080e-01, -3.520e-02, 6.894e-02, 3.418e-01, -1.035e-01, -9.850e-02, -1.067e-02, -9.497e-02, 4.734e-02, -4.784e-02, 1.821e-02, 3.529e-02) * s0_2_1;
	r1 += M4(5.547e-02, 2.619e-03, -7.460e-02, 6.790e-02, 4.346e-03, 3.701e-02, 2.783e-01, 4.170e-01, -9.662e-02, 4.606e-02, -1.735e-01, 6.730e-02, 3.552e-02, -8.869e-02, -5.802e-02, -5.220e-01) * s0_2_1;
	r2 += M4(6.503e-02, 4.594e-02, 1.578e-01, 8.230e-03, -3.810e-02, 3.016e-01, 3.642e-02, -6.884e-03, -8.892e-03, -1.060e-01, 1.996e-01, 4.304e-03, -4.527e-02, -1.341e-01, -2.142e-01, -4.816e-02) * s0_2_1;
	r0 += M4(1.473e-02, 2.129e-02, -4.376e-03, 4.735e-03, -1.983e-01, 6.756e-02, -3.185e-02, -2.349e-01, 1.704e-01, 3.155e-02, 9.355e-03, 1.651e-01, 6.347e-03, -5.479e-03, 3.229e-03, 1.317e-02) * s0_2_2;
	r1 += M4(2.537e-02, 7.646e-02, -1.252e-02, 3.624e-02, 1.077e-01, 4.443e-01, -8.389e-02, 1.103e-01, -9.127e-02, 7.144e-02, 4.252e-02, 1.392e-01, -1.558e-01, -3.956e-01, -1.020e-02, -8.757e-02) * s0_2_2;
	r2 += M4(1.172e-02, -1.011e-02, 1.446e-01, -5.594e-02, 4.819e-02, 3.454e-02, 1.032e-01, -8.645e-02, -6.539e-03, -5.725e-02, 6.345e-02, -2.051e-01, -2.730e-02, 8.546e-02, -2.173e-04, -6.310e-02) * s0_2_2;
	r0 += M4(-1.949e-03, -8.463e-03, 2.130e-02, -5.778e-03, 1.783e-02, -1.446e-02, 2.318e-02, 1.757e-02, -3.030e-02, 2.037e-02, -2.761e-03, -8.994e-03, -2.263e-02, 1.207e-02, 8.217e-03, -1.035e-02) * s1_0_0;
	r1 += M4(-4.097e-02, -3.037e-03, -7.544e-03, -4.857e-03, -1.115e-01, -3.294e-03, -2.425e-02, -1.321e-02, 3.265e-02, 2.768e-03, 4.429e-02, -1.095e-02, 3.892e-02, 1.793e-03, 4.620e-02, -1.238e-02) * s1_0_0;
	r2 += M4(-1.000e+00, 3.774e-02, -9.242e-01, -2.699e-02, -1.000e+00, 9.568e-02, 4.628e-02, -1.244e-02, -1.000e+00, -8.301e-02, 3.153e-01, 1.445e-02, -1.000e+00, 6.279e-02, 1.717e-01, 5.590e-03) * s1_0_0;
	r0 += M4(8.893e-03, -1.578e-02, -4.150e-02, 7.420e-03, 1.102e-02, -5.231e-03, -2.569e-01, 8.623e-03, -4.454e-02, -1.249e-02, 1.426e-01, -2.889e-02, -1.915e-02, 3.076e-02, 4.136e-03, -7.681e-03) * s1_0_1;
	r1 += M4(-3.045e-02, 1.094e-02, -1.216e-02, 3.018e-03, -8.106e-02, 2.149e-02, -7.933e-02, 2.862e-02, -5.226e-02, -1.973e-02, 6.912e-02, -1.266e-02, -5.689e-03, -1.360e-02, -1.592e-02, 5.563e-03) * s1_0_1;
	r2 += M4(3.561e-02, -3.531e-03, -6.086e-01, -3.883e-02, -1.000e+00, -2.478e-01, -1.991e-01, -9.126e-02, -9.174e-02, -2.699e-01, -2.413e-02, -5.855e-02, -1.333e-03, 7.179e-03, -7.927e-02, 4.994e-02) * s1_0_1;
	r0 += M4(6.045e-03, 2.462e-02, -6.276e-02, 2.281e-02, 7.280e-02, 1.858e-02, 5.481e-02, 3.482e-02, -9.948e-04, 2.818e-02, 6.355e-02, -7.767e-03, -7.812e-03, -1.371e-02, -3.500e-02, -3.448e-02) * s1_0_2;
	r1 += M4(-5.963e-02, -1.862e-02, -5.563e-02, 2.171e-02, -2.983e-02, -1.750e-02, 6.133e-03, 4.495e-03, -3.333e-02, 3.439e-02, -1.667e-02, -1.077e-02, -3.613e-02, -3.882e-03, -1.214e-02, 4.685e-03) * s1_0_2;
	r2 += M4(1.899e-01, 3.175e-02, -3.122e-01, -7.344e-02, 2.103e-01, 4.084e-03, -7.992e-02, 6.136e-02, -5.855e-02, 7.427e-03, 6.255e-02, 9.404e-02, -1.274e-01, -3.689e-03, 8.075e-03, -5.354e-02) * s1_0_2;
	r0 += M4(1.591e-02, 8.037e-02, 1.614e-02, 9.541e-03, -5.861e-03, -2.464e-02, 9.445e-03, 1.061e-02, 2.078e-02, 1.640e-02, 1.431e-02, 1.213e-03, 3.438e-02, -1.985e-02, 5.706e-03, -1.655e-02) * s1_1_0;
	r1 += M4(7.300e-02, -9.112e-03, 1.075e-01, -2.902e-03, -1.314e-01, -4.719e-04, -3.903e-02, -3.601e-02, -3.619e-02, -1.796e-03, -1.516e-01, -1.116e-02, -1.695e-01, 1.016e-02, -7.305e-02, 6.492e-02) * s1_1_0;
	r2 += M4(-5.067e-02, 1.215e-01, -5.484e-01, -2.155e-02, -1.000e+00, -2.400e-01, -5.202e-01, -7.181e-02, 2.286e-01, 8.088e-02, 2.709e-01, 3.672e-02, -3.372e-01, 4.101e-02, 2.347e-01, -1.844e-02) * s1_1_0;
	r0 += M4(-9.734e-02, -4.179e-01, -2.865e-01, 7.324e-04, -3.154e-01, -6.669e-02, 1.727e-01, -3.133e-01, 4.401e-01, 1.273e-02, -9.205e-02, 2.770e-01, 1.039e-01, -8.128e-02, 3.760e-01, 7.615e-02) * s1_1_1;
	r1 += M4(-5.013e-01, 2.927e-02, -4.795e-01, 6.918e-03, 9.691e-02, 3.348e-02, -3.091e-01, 1.510e-01, -7.987e-02, -6.415e-02, 1.645e-01, -8.535e-02, 6.664e-02, -5.471e-03, -1.184e-01, 6.583e-02) * s1_1_1;
	r2 += M4(-5.513e-01, -4.346e-01, -1.527e-01, 4.914e-02, -3.338e-02, -1.678e-01, 5.067e-01, 1.191e-01, 1.674e-01, 2.026e-01, 3.517e-02, 3.633e-02, -4.235e-02, -1.522e-01, -8.577e-02, 5.195e-02) * s1_1_1;
	r0 += M4(-1.160e-01, -1.049e-01, -4.693e-02, -1.148e-01, 5.613e-02, 3.699e-02, -1.369e-02, 1.106e-01, 6.322e-02, -9.251e-02, -7.330e-02, 1.177e-01, -4.585e-02, 1.270e-02, 1.614e-01, -6.274e-02) * s1_1_2;
	r1 += M4(-1.642e-02, 4.804e-02, -4.552e-02, 5.151e-02, -5.233e-02, -3.009e-01, -4.426e-02, -2.170e-01, -1.379e-02, -2.901e-01, -7.231e-02, -9.249e-02, -2.208e-01, 4.821e-02, -5.043e-02, 2.005e-02) * s1_1_2;
	r2 += M4(-6.521e-02, -1.606e-01, -7.842e-02, -7.835e-01, 1.364e-01, 1.331e-01, -4.575e-02, -1.230e-01, -1.740e-02, -4.764e-02, -3.671e-02, -1.775e-01, -2.574e-02, -7.288e-02, -4.810e-02, -1.195e-01) * s1_1_2;
	r0 += M4(2.963e-02, 2.362e-03, -9.628e-03, -9.468e-03, 5.088e-02, 3.136e-02, -1.650e-02, 1.775e-02, -8.421e-02, -4.965e-02, -5.024e-03, -6.077e-02, 2.563e-03, -3.441e-02, 1.996e-02, 8.349e-03) * s1_2_0;
	r1 += M4(-6.437e-02, 1.674e-03, 3.757e-02, -6.103e-02, -1.959e-02, -2.980e-02, -8.571e-02, -1.616e-01, 5.918e-03, 3.558e-02, 4.453e-02, 4.891e-02, -3.541e-02, 1.234e-02, 6.276e-02, 1.129e-01) * s1_2_0;
	r2 += M4(4.234e-02, 7.599e-02, -9.764e-01, 3.722e-02, 5.135e-02, 2.204e-02, 4.516e-01, -1.005e-02, -6.718e-02, -9.350e-03, -1.259e-01, 2.617e-02, -4.761e-02, 4.549e-03, 3.171e-01, -3.033e-02) * s1_2_0;
	r0 += M4(-2.968e-01, -2.741e-01, 1.164e-02, -3.442e-03, 1.193e-01, 5.861e-02, 4.166e-03, 8.102e-02, -2.671e-01, -1.491e-02, -3.632e-03, -1.184e-01, -2.612e-01, -3.507e-02, -1.474e-01, -1.754e-01) * s1_2_1;
	r1 += M4(-1.737e-01, 2.982e-02, -1.328e-01, -5.254e-01, -1.147e-02, -2.647e-01, -3.341e-02, -2.785e-01, 5.774e-02, 1.096e-01, 7.651e-02, 3.055e-01, -1.578e-01, 2.831e-01, -2.566e-01, 2.206e-01) * s1_2_1;
	r2 += M4(1.035e-01, -1.895e-01, -5.203e-01, -3.170e-02, 3.688e-02, 9.614e-02, -9.237e-02, -5.406e-02, -9.259e-02, -1.101e-01, -7.354e-02, 3.134e-02, 1.217e-01, -1.749e-01, 4.802e-01, 8.652e-03) * s1_2_1;
	r0 += M4(-8.162e-02, -8.566e-02, -1.375e-02, -6.663e-02, -3.427e-02, -1.996e-02, -1.089e-02, -1.081e-02, 1.976e-03, 2.959e-02, 3.202e-02, -9.305e-02, 3.076e-01, 1.881e-03, -1.313e-03, 2.015e-01) * s1_2_2;
	r1 += M4(-1.154e-02, -5.059e-01, -3.502e-04, -5.406e-02, 4.761e-02, 8.048e-02, -2.982e-02, -7.888e-03, 2.780e-02, 2.097e-01, -2.780e-02, 2.639e-02, -7.128e-03, -3.658e-01, -2.475e-02, -2.280e-01) * s1_2_2;
	r2 += M4(-1.048e-02, -1.097e-02, -3.710e-02, -3.887e-01, -2.787e-02, -2.931e-02, -1.495e-01, -8.356e-02, -4.693e-02, 1.982e-02, -1.797e-01, 8.811e-02, 9.693e-02, -3.309e-02, 7.509e-02, -1.261e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(1.523e-03, -7.714e-03, 1.356e-02, -6.571e-03, 5.085e-02, -2.654e-02, 3.887e-02, -2.401e-02, -1.428e-02, 2.122e-02, 3.923e-02, -5.418e-03, -5.173e-02, 2.396e-02, -1.304e-01, -1.606e-02) * s0_0_0;
	r1 += M4(8.877e-02, -1.827e-03, 7.677e-03, 1.710e-02, -1.268e-01, -3.285e-03, -5.588e-02, -3.362e-02, -6.856e-02, 6.552e-03, -4.372e-02, 2.939e-02, 3.827e-02, -1.391e-04, 2.054e-02, 9.095e-03) * s0_0_0;
	r2 += M4(-1.000e+00, -3.023e-02, -9.053e-02, 6.961e-03, -1.000e+00, 1.770e-02, -4.240e-02, 3.765e-03, -1.000e+00, -5.615e-02, 3.388e-01, -1.239e-02, -1.000e+00, 6.664e-02, -1.195e-02, -2.433e-03) * s0_0_0;
	r0 += M4(-9.828e-03, 3.942e-03, 3.229e-02, -2.285e-02, 2.093e-02, -1.666e-02, 4.795e-01, 3.577e-03, -8.772e-02, 1.284e-01, 2.589e-01, -2.520e-02, -4.079e-02, 3.622e-02, 1.403e-01, -1.061e-02) * s0_0_1;
	r1 += M4(1.015e-01, 2.229e-03, 5.160e-02, 1.026e-02, 1.193e-01, -1.578e-02, 1.155e-01, 8.765e-02, 2.394e-01, 7.858e-04, 2.919e-01, 3.502e-02, 2.524e-02, -3.755e-02, 2.660e-02, -2.261e-02) * s0_0_1;
	r2 += M4(-1.653e-01, 1.290e-02, 2.091e-02, 5.932e-02, -7.375e-01, -7.154e-02, -2.879e-02, -3.189e-02, -1.000e+00, -4.813e-02, 1.494e-01, 5.854e-02, -2.757e-01, 1.114e-02, 3.796e-02, -3.960e-02) * s0_0_1;
	r0 += M4(-2.971e-05, 1.087e-03, 1.311e-02, -1.422e-03, -4.766e-03, -3.560e-03, -5.021e-02, 4.806e-03, -8.054e-02, 1.390e-02, -4.044e-02, -1.166e-01, -1.324e-02, -1.800e-02, -9.672e-03, -2.095e-02) * s0_0_2;
	r1 += M4(6.585e-03, -4.054e-03, 2.162e-02, -1.648e-02, 1.540e-02, -1.321e-03, 1.655e-02, 8.510e-04, 8.825e-02, 1.586e-02, 2.992e-02, 3.939e-02, 2.318e-02, 4.667e-04, -1.235e-02, -1.464e-02) * s0_0_2;
	r2 += M4(1.805e-02, -7.340e-03, -3.447e-02, -2.919e-03, 3.910e-02, 5.200e-03, -1.134e-01, 3.602e-02, -6.114e-02, 5.190e-02, -1.281e-01, 2.752e-01, -5.210e-02, -9.890e-03, -1.626e-02, 1.676e-02) * s0_0_2;
	r0 += M4(-2.940e-02, -7.226e-02, -4.775e-02, -2.205e-02, 1.423e-01, -5.532e-02, 7.342e-02, 1.162e-01, 4.644e-02, 6.109e-02, -2.503e-02, 4.227e-02, -2.690e-01, 9.479e-02, -9.797e-02, -5.895e-02) * s0_1_0;
	r1 += M4(-2.933e-01, -1.186e-02, -2.314e-01, 1.872e-02, -2.603e-01, 5.417e-03, 5.688e-02, 7.456e-02, -1.770e-02, -3.686e-02, 2.399e-03, -7.934e-02, 4.072e-01, 2.289e-02, -2.096e-01, 5.066e-02) * s0_1_0;
	r2 += M4(-4.232e-02, -1.184e-01, -5.035e-01, -3.893e-02, 1.451e-01, -7.633e-02, 5.020e-01, 3.429e-02, -3.700e-02, -7.827e-03, -5.952e-01, -1.735e-02, 2.185e-01, -3.625e-01, -6.549e-01, -2.425e-02) * s0_1_0;
	r0 += M4(-1.149e-01, 3.536e-02, -3.947e-01, -9.402e-02, 5.563e-01, 6.914e-01, 2.720e-02, -1.047e-01, 6.188e-01, 7.366e-02, -5.887e-02, 2.597e-01, 4.189e-01, 5.924e-02, 3.042e-02, 3.108e-01) * s0_1_1;
	r1 += M4(6.833e-02, -3.829e-02, -1.711e-01, 4.322e-02, 4.203e-01, 2.501e-01, 5.946e-02, 1.029e-01, -4.073e-01, -1.352e-01, -6.463e-01, 5.140e-02, -1.899e-01, -6.775e-02, 1.224e-01, -2.418e-02) * s0_1_1;
	r2 += M4(-2.448e-02, 2.082e-01, -1.859e-01, -1.774e-01, 7.022e-02, 1.606e-01, 8.021e-02, -2.144e-01, 1.063e-01, -1.300e-01, -1.467e-01, -1.884e-02, -1.567e-01, 1.041e-01, -2.410e-02, 4.873e-01) * s0_1_1;
	r0 += M4(2.961e-02, 1.037e-03, 3.102e-02, 7.597e-03, -1.073e-01, 3.193e-02, -3.562e-03, -5.635e-02, -2.460e-02, -6.017e-02, -3.964e-02, 8.764e-02, 1.811e-03, -2.648e-02, -1.452e-02, 7.791e-04) * s0_1_2;
	r1 += M4(2.170e-02, 2.841e-02, 3.106e-02, -1.637e-02, 1.522e-02, -5.866e-02, -9.920e-03, 1.068e-01, -1.133e-01, 1.808e-01, -1.516e-02, 1.893e-02, -4.945e-03, 4.521e-02, 1.341e-02, -1.599e-02) * s0_1_2;
	r2 += M4(2.437e-02, 8.708e-02, -1.071e-01, -2.279e-02, -1.621e-02, 2.122e-02, 2.106e-02, -5.091e-02, -7.399e-02, 3.073e-03, 1.239e-01, -5.048e-01, 9.725e-03, 1.450e-02, 5.982e-02, 3.263e-02) * s0_1_2;
	r0 += M4(-1.099e-01, -1.108e-01, 3.081e-03, -1.382e-01, -4.504e-03, -1.108e-02, -1.047e-02, -2.640e-02, -2.361e-02, 1.503e-04, -9.261e-03, -2.846e-02, -7.291e-02, 1.198e-02, -1.394e-02, -2.485e-02) * s0_2_0;
	r1 += M4(1.024e-01, 3.238e-02, -1.656e-01, -4.504e-02, -7.104e-02, -3.342e-02, 4.172e-02, -1.919e-01, 4.698e-02, 1.023e-02, -8.132e-02, -8.379e-03, 6.669e-02, 1.171e-02, 2.050e-02, 1.555e-01) * s0_2_0;
	r2 += M4(-3.088e-01, -9.593e-02, -9.672e-02, -1.292e-02, -5.875e-02, -4.921e-02, 2.056e-01, -1.604e-03, -2.124e-02, -3.941e-02, -1.260e-01, -9.489e-03, -1.486e-01, 9.352e-02, -7.396e-02, 2.337e-02) * s0_2_0;
	r0 += M4(-4.895e-01, 4.635e-02, 8.976e-03, -5.254e-01, 8.978e-02, 8.229e-02, 9.995e-04, 2.520e-02, -4.022e-03, -5.403e-02, 1.741e-02, -3.587e-02, -4.010e-02, -5.730e-02, -8.679e-03, -1.002e-01) * s0_2_1;
	r1 += M4(1.483e-01, -5.069e-02, -3.573e-02, 2.822e-01, 3.954e-02, -3.467e-01, -1.979e-02, 3.591e-01, -6.502e-02, -1.176e-02, 7.560e-02, -1.602e-02, 1.746e-02, 2.166e-01, -2.620e-02, -1.672e-01) * s0_2_1;
	r2 += M4(-4.002e-03, -1.315e-01, -3.702e-01, 6.687e-03, -1.555e-02, -2.646e-03, 2.818e-01, 1.262e-02, -3.130e-02, -1.363e-02, -1.756e-01, 4.932e-02, 2.312e-02, -2.424e-03, -1.307e-01, 8.012e-03) * s0_2_1;
	r0 += M4(2.917e-02, -4.144e-02, 6.659e-03, -3.184e-02, 2.427e-02, 1.311e-02, 7.157e-03, 1.397e-02, -4.243e-02, -7.759e-03, -6.281e-04, -3.075e-02, -2.698e-02, 1.182e-02, 3.748e-03, -3.314e-03) * s0_2_2;
	r1 += M4(-4.350e-02, 1.069e-01, 1.912e-02, -5.543e-02, 2.762e-02, 7.148e-02, 2.177e-02, 8.732e-03, -9.972e-03, -1.023e-01, 2.344e-03, -7.249e-02, 4.277e-04, 3.140e-02, 3.308e-03, 1.029e-02) * s0_2_2;
	r2 += M4(4.847e-03, -4.588e-02, -1.185e-01, 1.703e-02, 2.999e-02, -7.805e-03, -1.450e-02, -3.126e-02, 1.125e-02, 7.471e-03, -1.051e-01, 7.549e-02, -4.553e-03, 1.877e-02, -3.491e-02, -1.067e-02) * s0_2_2;
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_3x12_SOFT] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv3
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(5.536e-02, -4.016e-03, 2.585e-03, -5.604e-04, 5.359e-02, -1.904e-03, -4.353e-03, -8.628e-04, -1.334e-05, 1.528e-03, 9.130e-04, 3.965e-04, -1.274e-01, -9.920e-03, -1.558e-03, 1.109e-03) * s0_0_0;
	r0 += M4(1.429e-01, 2.105e-02, -2.658e-03, 1.064e-03, 1.624e-01, 1.890e-01, 5.589e-04, -8.300e-03, -1.184e-03, -6.996e-03, -5.439e-04, -2.965e-04, 3.534e-03, 1.316e-01, 4.101e-03, 1.396e-02) * s0_0_1;
	r0 += M4(-2.898e-04, 7.801e-03, -5.162e-04, -4.971e-03, -6.050e-03, 1.224e-03, 1.609e-03, -1.689e-03, 2.287e-04, 2.282e-03, -2.525e-05, 1.315e-04, 1.248e-04, 1.197e-03, 1.381e-04, 1.832e-03) * s0_0_2;
	r0 += M4(1.166e-01, 2.748e-03, 6.888e-02, 1.613e-03, 1.431e-01, -1.061e-02, 1.703e-01, -8.430e-03, -3.956e-02, 1.436e-02, -7.332e-04, 3.803e-03, -2.992e-01, 8.243e-03, -4.720e-01, -1.308e-02) * s0_1_0;
	r0 += M4(-1.267e-02, -2.034e-01, -1.401e-01, -8.315e-01, 1.610e-03, 1.890e-02, -1.588e-02, 4.589e-01, -3.740e-01, -4.455e-01, 1.833e-03, -1.304e-02, -4.794e-03, 2.627e-01, 5.121e-03, 3.089e-01) * s0_1_1;
	r0 += M4(8.917e-04, -9.259e-03, -3.589e-03, -2.651e-02, -3.803e-03, 1.419e-02, -9.648e-03, -2.613e-02, 7.094e-03, -1.163e-02, -1.082e-03, 7.805e-03, -1.929e-04, -4.041e-03, 1.102e-04, -2.750e-03) * s0_1_2;
	r0 += M4(1.589e-03, 6.034e-04, -2.218e-02, -2.793e-03, -1.781e-03, 4.077e-04, -1.943e-04, -6.053e-03, -4.239e-03, -1.245e-03, 1.460e-01, 1.470e-02, -4.503e-04, -1.908e-03, 3.483e-02, 1.103e-02) * s0_2_0;
	r0 += M4(8.837e-04, -5.246e-04, -3.600e-03, -3.899e-02, -1.353e-03, -5.442e-03, 1.784e-02, -1.268e-02, 2.917e-04, 3.233e-03, 2.026e-01, 2.789e-01, 4.183e-04, 6.724e-03, -1.669e-02, 6.788e-02) * s0_2_1;
	r0 += M4(2.707e-04, -1.812e-03, 4.570e-04, -4.994e-03, -3.174e-04, 4.956e-04, -6.908e-04, 5.812e-03, 2.928e-03, -4.311e-03, 1.661e-02, 7.037e-02, 6.701e-05, 2.791e-03, -2.448e-04, 8.622e-04) * s0_2_2;
	r0 += M4(2.473e-03, 1.688e-04, 6.076e-04, 2.107e-04, 6.776e-01, -3.207e-01, -3.948e-01, -1.762e-02, -1.305e-02, 1.658e-03, 3.043e-03, 1.161e-03, 4.288e-02, 1.121e-03, 3.547e-02, 3.312e-04) * s1_0_0;
	r0 += M4(1.611e-03, 3.590e-02, 1.813e-02, 1.043e-02, -1.250e-02, 4.802e-03, 7.519e-03, -7.370e-02, 4.907e-02, 3.992e-02, -6.265e-03, -2.309e-03, -1.767e-01, 6.163e-01, -2.640e-01, -2.668e-03) * s1_0_1;
	r0 += M4(4.046e-03, -3.551e-02, 1.189e-03, 2.776e-04, 6.009e-05, -4.936e-04, -1.673e-04, -7.570e-04, 4.939e-04, 3.102e-02, -6.439e-04, 4.612e-04, 1.237e-02, -8.277e-02, -8.561e-03, -1.073e-01) * s1_0_2;
	r0 += M4(1.381e-02, 6.220e-03, 7.448e-03, 5.965e-03, -6.904e-03, 1.199e-03, 1.292e-02, -1.011e-01, -5.317e-02, -4.654e-03, -3.493e-02, -4.372e-04, 3.622e-04, -4.991e-04, 3.478e-02, 1.374e-03) * s1_1_0;
	r0 += M4(-4.287e-01, -1.516e-01, -2.309e-01, 3.710e-02, -2.734e-03, -4.113e-03, -4.476e-03, -3.980e-02, 1.792e-01, -5.059e-01, 4.834e-01, 2.061e-02, 1.111e-02, 1.884e-03, 6.519e-02, 1.851e-01) * s1_1_1;
	r0 += M4(5.946e-03, -7.980e-02, 8.023e-03, -5.669e-02, -1.352e-04, -1.234e-03, 2.979e-05, -8.102e-04, 3.982e-04, 9.377e-02, -1.470e-03, 1.101e-01, 1.587e-03, 2.382e-03, 1.412e-02, 2.295e-02) * s1_1_2;
	r0 += M4(9.474e-04, 2.385e-04, 5.123e-03, 2.388e-03, -2.845e-05, -3.780e-06, 3.829e-04, -4.331e-04, 4.010e-03, 9.158e-04, -5.717e-02, -4.495e-03, -1.511e-09, 3.279e-07, -7.790e-05, 1.333e-06) * s1_2_0;
	r0 += M4(3.589e-03, 7.103e-03, -1.091e-01, -3.753e-02, -1.133e-06, -7.744e-05, 2.463e-04, 1.461e-04, -6.165e-03, 2.632e-03, -1.947e-02, -1.538e-01, 2.204e-05, 8.564e-06, -7.625e-04, 9.649e-05) * s1_2_1;
	r0 += M4(7.097e-04, -4.438e-03, 6.621e-04, -3.779e-02, -5.328e-08, 1.141e-06, -7.899e-08, 3.284e-05, -1.033e-03, -1.219e-03, -8.548e-03, 5.265e-03, 6.527e-07, 5.636e-05, -1.795e-04, -3.353e-04) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(9.892e-03, 3.307e-03, 4.081e-03, -2.847e-03, -1.413e-02, -1.166e-03, 9.904e-04, -8.748e-04, -7.642e-03, 2.281e-04, -1.446e-02, -8.679e-03, 4.054e-02, -3.649e-02, -1.417e-02, -4.728e-03) * s0_0_0;
	r0 += M4(-1.361e-01, 1.022e-02, 2.567e-02, 3.283e-02, -7.628e-02, -6.897e-02, -3.096e-03, -1.908e-03, -3.357e-02, 3.973e-03, 3.743e-03, 6.577e-03, -1.645e-03, 3.705e-02, -5.532e-03, 4.438e-04) * s0_0_1;
	r0 += M4(5.677e-03, -2.132e-02, 4.381e-03, 3.103e-02, 1.940e-04, 3.724e-03, -1.168e-04, -2.697e-03, -3.001e-02, -7.031e-02, 2.111e-02, 6.155e-03, -3.733e-04, -2.736e-03, -1.413e-05, -1.431e-03) * s0_0_2;
	r0 += M4(-1.418e-02, 9.716e-03, -1.018e-03, 1.490e-02, -4.433e-02, 2.718e-03, -7.056e-02, -3.833e-03, -1.591e-03, -2.485e-04, -1.607e-02, -3.088e-03, -4.830e-01, 2.328e-01, 3.691e-01, 1.952e-02) * s0_1_0;
	r0 += M4(1.057e+00, 4.298e-02, -6.448e-01, -1.343e-01, 2.542e-01, 2.214e-01, 2.370e-01, -2.799e-01, 2.331e-02, -2.824e-02, -5.553e-02, -1.174e-02, 2.186e-02, -1.685e-01, -1.702e-02, 1.440e-01) * s0_1_1;
	r0 += M4(-1.265e-02, 3.654e-01, -3.506e-02, -1.267e-01, 1.053e-03, 1.061e-01, 5.046e-03, 1.213e-01, 1.606e-01, 1.877e-01, -1.028e-01, -1.076e-01, -7.882e-04, -2.580e-03, -4.474e-04, -1.439e-03) * s0_1_2;
	r0 += M4(-1.542e-02, -4.031e-04, -6.525e-03, -2.250e-03, -3.230e-03, 1.566e-04, 3.961e-02, 4.338e-03, 7.764e-04, 5.295e-04, -7.335e-03, 8.814e-05, 2.546e-02, -2.311e-04, -4.908e-02, 5.959e-02) * s0_2_0;
	r0 += M4(-1.965e-02, -1.732e-02, 2.570e-01, -2.352e-02, -3.292e-03, -2.902e-03, 8.418e-02, 9.922e-02, -6.708e-02, -1.138e-03, -3.354e-02, -2.647e-02, 2.370e-03, 6.271e-03, 4.345e-03, 6.721e-04) * s0_2_1;
	r0 += M4(-1.872e-01, -2.730e-02, 4.286e-02, 8.333e-02, -2.086e-04, -8.880e-04, -8.076e-04, 2.815e-02, -1.657e-02, -1.066e-01, 1.353e-01, 8.888e-02, -3.005e-04, 2.559e-03, 4.856e-05, 6.482e-04) * s0_2_2;
	r0 += V4(1.718e-08, 2.879e-08, 5.555e-09, -1.238e-09);
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
